package org.swu.swuse.Pipeline;

import org.swu.swuse.dao.WebPageDao;
import org.swu.swuse.model.WebPage;
import org.swu.swuse.utils.SpringUtils;

import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;

/**
 * WebMagic的Pipeline定制化实现<br >
 * 主要实现功能是实现WebPage模型的数据库持久化
 * 
 * @author zhanjingbo
 *
 */
public class DataBasePipeline implements Pipeline {

	private WebPageDao webPageDao;

	public DataBasePipeline() {
		webPageDao = SpringUtils.getBean(WebPageDao.class);
	}

	public void process(ResultItems resultItems, Task task) {
		WebPage webpage = resultItems.get("webpage");
		if (webpage == null) {
			return;
		}
		WebPage temp = webPageDao.getWebPageByUrl(webpage.getUrl());
		if (temp != null) {
			if (temp.getHtml().equals(webpage.getHtml())) {
				return;
			}
		}
		webPageDao.addWebPage(webpage);
	}

}
